
configfile: 'config/v1.yaml'

singularity: "docker://alzhang/scrna-analysis-external-3.5:v1.0"

rule all:
    input:
        koh_annotated='{outdir}/koh/sce_annotated.rds'.format(outdir=config['outdir']),

rule koh_preprocess:
    output:
        '{workdir}/koh/sce_preprocess.rds'.format(workdir=config['workdir'])
    params:
        name='koh-preprocess',
        celltypes=config['koh_analysis']['celltypes'],
    log:
        '{logdir}/logs/koh/sce_preprocess.log'.format(logdir=config['logdir'])
    benchmark:
        '{logdir}/benchmarks/sce_preprocess.txt'.format(logdir=config['logdir'])
    shell:
        'Rscript R/koh_preprocess.R '
        '--koh_celltypes {params.celltypes} '
        '--outfname {output} '
        '>& {log}'

rule koh_cellassign:
    input:
        koh_preprocessed='{workdir}/koh/sce_preprocess.rds'.format(workdir=config['workdir']),
        bulkrna_de=config['koh_data']['bulkrna_de'],
    output:
        koh_cellassign='{workdir}/koh/sce_cellassign.rds'.format(workdir=config['workdir']),
        cellassign_rho='{workdir}/koh/cellassign_rho.tsv'.format(workdir=config['workdir']),
    params:
        name='koh-cellassign',
        marker_quantile=config['koh_analysis']['marker_quantile'],
        celltypes=config['koh_analysis']['celltypes'],
    log:
        '{logdir}/logs/koh/sce_cellassign.log'.format(logdir=config['logdir'])
    benchmark:
        '{logdir}/benchmarks/sce_cellassign.txt'.format(logdir=config['logdir'])
    threads: 20
    shell:
        'Rscript R/koh_cellassign.R '
        '--sce {input.koh_preprocessed} '
        '--koh_bulkrna {input.bulkrna_de} '
        '--koh_celltypes {params.celltypes} '
        '--quantile_cutoff {params.marker_quantile} '
        '--conda_env r-tensorflow '
        '--outfname {output.koh_cellassign} '
        '--outrho {output.cellassign_rho} '
        '>& {log}'

# Run external clustering methods on Koh data
rule koh_cluster:
    input:
        koh_cellassign='{workdir}/koh/sce_cellassign.rds'.format(workdir=config['workdir']),
        cellassign_rho='{workdir}/koh/cellassign_rho.tsv'.format(workdir=config['workdir']),
    output:
        koh_annotated='{outdir}/koh/sce_annotated.rds'.format(outdir=config['outdir']),
    params:
        name='koh-cluster',
        koh_cluster_methods=config['koh_analysis']['clustering_methods'],
    log:
        '{logdir}/logs/koh/sce_cluster.log'.format(logdir=config['logdir'])
    benchmark:
        '{logdir}/benchmarks/sce_cluster.txt'.format(logdir=config['logdir'])
    threads: 20
    shell:
        'Rscript R/koh_cluster.R '
        '--sce {input.koh_cellassign} '
        '--rho {input.cellassign_rho} '
        '--clustering_methods {params.koh_cluster_methods} '
        '--conda_env r-tensorflow '
        '--outfname {output.koh_annotated} '
        '>& {log}'

